# plotly standard imports
import plotly.graph_objs as go
import chart_studio.plotly as py
# Cufflinks wrapper on plotly
import cufflinks
# Data science imports
import pandas as pd
import numpy as np
# Options for pandas
pd.options.display.max_columns = 30
# Display all cell outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'
from plotly.offline import iplot, init_notebook_mode
cufflinks.go_offline(connected=True)
init_notebook_mode(connected=True)
# Set global theme
cufflinks.set_config_file(world_readable=True, theme='pearl')
Will try predict and denormalise model prediction
from src.load_datasets import load_datasets
from src.prepare_datasets import add_indicators
train, test = load_datasets()
test.index = pd.to_datetime(test.pop('timestamp'), unit='ms')
test = add_indicators(test)
test = test.dropna()
test
test[::60].iplot(subplots=True)
| open | high | low | close | volume | MACD | Stochastics Oscillator | ATR | |
|---|---|---|---|---|---|---|---|---|
| timestamp | ||||||||
| 2020-04-22 13:16:00 | 6964.000000 | 6964.0 | 6963.977331 | 6964.0 | 5.338192 | -0.354599 | 82.026144 | 1.579205 |
| 2020-04-22 13:17:00 | 6964.000000 | 6980.0 | 6963.999575 | 6980.0 | 27.874409 | 0.692947 | 88.235294 | 2.609292 |
| 2020-04-22 13:18:00 | 6978.775679 | 6991.6 | 6978.775679 | 6991.6 | 26.115153 | 2.031269 | 94.117647 | 3.338937 |
| 2020-04-22 13:19:00 | 6991.600000 | 7025.7 | 6990.700000 | 7013.8 | 165.308843 | 4.153526 | 94.341417 | 5.600442 |
| 2020-04-22 13:20:00 | 7014.600000 | 7050.0 | 7013.900000 | 7035.4 | 69.579311 | 6.608127 | 89.186050 | 7.786124 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2021-03-09 18:50:00 | 53725.000000 | 53727.0 | 53725.000000 | 53727.0 | 1.119537 | -18.280136 | 0.294985 | 34.199619 |
| 2021-03-09 18:51:00 | 53780.000000 | 53780.0 | 53779.000000 | 53779.0 | 0.010000 | -14.792633 | 9.768669 | 35.542503 |
| 2021-03-09 18:52:00 | 53814.000000 | 53828.0 | 53814.000000 | 53828.0 | 0.016450 | -8.313851 | 27.838845 | 36.503753 |
| 2021-03-09 18:53:00 | 53903.000000 | 53910.0 | 53902.000000 | 53910.0 | 0.012500 | 1.825112 | 60.000000 | 39.753485 |
| 2021-03-09 18:54:00 | 53930.366010 | 53958.0 | 53930.366010 | 53953.0 | 1.040000 | 11.233655 | 83.144342 | 40.342522 |
454041 rows × 8 columns
from tqdm import tqdm
from src.prepare_datasets import normalize_row
tqdm.pandas(desc="test dataset")
test_norm = test.progress_apply(normalize_row, axis=1)
test_norm[::60].iplot(subplots=True)
test dataset: 100%|██████████| 454041/454041 [03:18<00:00, 2282.29it/s]
test_norm = test_norm[test.columns.tolist()]
test_norm[::60].iplot(subplots=True)
Check denormalisation working correctly
from src.prepare_datasets import denormalise_row
tqdm.pandas(desc="test norm")
test_denorm = test_norm.progress_apply(denormalise_row, axis=1)
test_denorm[::60].iplot(subplots=True)
test norm: 100%|██████████| 454041/454041 [03:25<00:00, 2211.54it/s]
test.head()
test_norm.head()
test_denorm.head()
| open | high | low | close | volume | MACD | Stochastics Oscillator | ATR | |
|---|---|---|---|---|---|---|---|---|
| timestamp | ||||||||
| 2020-04-22 13:16:00 | 6964.000000 | 6964.0 | 6963.977331 | 6964.0 | 5.338192 | -0.354599 | 82.026144 | 1.579205 |
| 2020-04-22 13:17:00 | 6964.000000 | 6980.0 | 6963.999575 | 6980.0 | 27.874409 | 0.692947 | 88.235294 | 2.609292 |
| 2020-04-22 13:18:00 | 6978.775679 | 6991.6 | 6978.775679 | 6991.6 | 26.115153 | 2.031269 | 94.117647 | 3.338937 |
| 2020-04-22 13:19:00 | 6991.600000 | 7025.7 | 6990.700000 | 7013.8 | 165.308843 | 4.153526 | 94.341417 | 5.600442 |
| 2020-04-22 13:20:00 | 7014.600000 | 7050.0 | 7013.900000 | 7035.4 | 69.579311 | 6.608127 | 89.186050 | 7.786124 |
| open | high | low | close | volume | MACD | Stochastics Oscillator | ATR | |
|---|---|---|---|---|---|---|---|---|
| timestamp | ||||||||
| 2020-04-22 13:16:00 | 0.070034 | 0.070034 | 0.070034 | 0.070034 | 0.000795 | -0.001262 | 0.820261 | 0.005814 |
| 2020-04-22 13:17:00 | 0.070034 | 0.070195 | 0.070034 | 0.070195 | 0.004150 | 0.002466 | 0.882353 | 0.009606 |
| 2020-04-22 13:18:00 | 0.070183 | 0.070312 | 0.070183 | 0.070312 | 0.003888 | 0.007229 | 0.941176 | 0.012292 |
| 2020-04-22 13:19:00 | 0.070312 | 0.070655 | 0.070303 | 0.070535 | 0.024609 | 0.014782 | 0.943414 | 0.020617 |
| 2020-04-22 13:20:00 | 0.070543 | 0.070899 | 0.070536 | 0.070752 | 0.010358 | 0.023518 | 0.891860 | 0.028663 |
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-47-c129c68fa13f> in <module> 1 test.head() 2 test_norm.head() ----> 3 test_denorm.head() NameError: name 'test_denorm' is not defined
from src.libs import load
import tensorflow as tf
model = load()
ds = tf.keras.preprocessing.timeseries_dataset_from_array(
test_norm,
targets=None,
sequence_length=32,
sequence_stride=32,
shuffle=False,
batch_size=8
)
input = next(iter(ds))
len(input)
predictions = model.predict(input)
len(predictions)
Model: "sequential" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= lstm (LSTM) (8, 32, 32) 5248 _________________________________________________________________ dense (Dense) (8, 32, 1) 33 ================================================================= Total params: 5,281 Trainable params: 5,281 Non-trainable params: 0 _________________________________________________________________
8
WARNING:tensorflow:6 out of the last 6 calls to <function Model.make_predict_function.<locals>.predict_function at 0x7fe8f47d6048> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.
8
predictions.shape
output = pd.Series(tf.reshape(predictions, [-1]).numpy())
output.index = test_norm[:256].index
output.iplot(subplots=True)
(8, 32, 1)
predicted2norm = pd.DataFrame({
'predicted': output,
'real': test_norm[:256]['close']
})
predicted2norm.index = test_norm[:256].index
predicted2norm.iplot(subplots=True)
from src.prepare_datasets import norm_d
norm_d
predicted_denorm = output.apply(lambda x: x * norm_d['close'] )
predicted_denorm.iplot()
high 99436.760000 low 99437.010000 open 99436.900000 close 99437.000000 volume 6717.516714 MACD 280.978824 Stochastics Oscillator 100.000000 ATR 271.641687 dtype: float64
predicted2real = pd.DataFrame({
'predicted': predicted_denorm,
'real': test[:256]['close']
})
predicted2real.index = test[:256].index
predicted2real.iplot(subplots=True)